import pandas as pd
import numpy as np
import missingno as msno
import matplotlib.pyplot as plt
import seaborn as sns
import sys
import os
import statsmodels.api as sm
import math
import plotly.express as px #graphing
import plotly.graph_objects as go #graphing
from plotly.subplots import make_subplots #graphing
from datetime import datetime, timedelta
#!pip install missingno
path = "/Users/ycq/Downloads/Principal/"
df =pd.read_csv(path + "/Quant Exercise.csv")
df.head(), df.info(), df.shape
<class 'pandas.core.frame.DataFrame'> RangeIndex: 68 entries, 0 to 67 Columns: 551 entries, DP03_0001E to fips dtypes: float64(300), int64(249), object(2) memory usage: 292.8+ KB
( DP03_0001E DP03_0001M DP03_0001PE DP03_0001PM DP03_0002E DP03_0002M \
0 84387 137 84387.0 NaN 53245.0 910
1 1015608 631 1015608.0 NaN 660484.0 3044
2 54444 101 54444.0 NaN 31615.0 519
3 136682 202 136682.0 NaN 84153.0 1035
4 40064 86 40064.0 NaN 23144.0 447
DP03_0002PE DP03_0002PM DP03_0003E DP03_0003M ... DP03_0136M \
0 63.1 1.1 53200 911 ... NaN
1 65.0 0.3 659555 3038 ... NaN
2 58.1 1.0 31593 518 ... NaN
3 61.6 0.7 84051 1040 ... NaN
4 57.8 1.1 23140 447 ... NaN
DP03_0136PE DP03_0136PM DP03_0137E DP03_0137M DP03_0137PE DP03_0137PM \
0 6.2 0.8 NaN NaN 17.4 1.8
1 7.7 0.4 NaN NaN 21.3 0.7
2 8.4 1.1 NaN NaN 23.8 2.2
3 6.8 0.7 NaN NaN 20.8 1.3
4 8.0 1.2 NaN NaN 25.0 3.2
GEO_ID county fips
0 0500000US42001 Adams 42001.0
1 0500000US42003 Allegheny 42003.0
2 0500000US42005 Armstrong 42005.0
3 0500000US42007 Beaver 42007.0
4 0500000US42009 Bedford 42009.0
[5 rows x 551 columns],
None,
(68, 551))
df.describe(), df.columns, df.isnull().sum()
( DP03_0001E DP03_0001M DP03_0001PE DP03_0001PM DP03_0002E \
count 6.800000e+01 68.000000 6.800000e+01 0.0 6.700000e+01
mean 3.075309e+05 263.514706 3.075378e+05 NaN 1.927388e+05
std 1.268587e+06 336.222496 1.268588e+06 NaN 8.033616e+05
min 3.880000e+03 30.000000 3.880000e+03 NaN 1.493000e+03
25% 3.499550e+04 95.750000 3.499550e+04 NaN 1.942900e+04
50% 7.116700e+04 157.500000 7.116700e+04 NaN 3.991700e+04
75% 1.809575e+05 324.500000 1.809575e+05 NaN 1.047060e+05
max 1.045605e+07 2462.000000 1.045605e+07 NaN 6.566126e+06
DP03_0002M DP03_0002PE DP03_0002PM DP03_0003E DP03_0003M \
count 68.000000 68.000000 68.000000 6.800000e+01 68.000000
mean 1165.588235 58.964706 1.079412 1.928849e+05 1165.411765
std 1557.778070 6.400088 0.640792 7.963784e+05 1548.812335
min 119.000000 21.500000 0.100000 1.493000e+03 119.000000
25% 443.250000 56.125000 0.700000 1.961725e+04 442.750000
50% 800.500000 59.400000 1.000000 4.101200e+04 801.500000
75% 1426.250000 62.850000 1.300000 1.122075e+05 1430.250000
max 11721.000000 68.300000 4.400000 6.558087e+06 11636.000000
... DP03_0135PM DP03_0136E DP03_0136M DP03_0136PE DP03_0136PM \
count ... 68.000000 0.0 0.0 68.000000 68.000000
mean ... 1.447059 NaN NaN 8.836765 1.242647
std ... 0.749023 NaN NaN 2.831639 0.684436
min ... 0.200000 NaN NaN 3.700000 0.200000
25% ... 0.800000 NaN NaN 6.800000 0.800000
50% ... 1.400000 NaN NaN 8.500000 1.200000
75% ... 1.700000 NaN NaN 10.025000 1.500000
max ... 3.800000 NaN NaN 20.400000 4.200000
DP03_0137E DP03_0137M DP03_0137PE DP03_0137PM fips
count 0.0 0.0 68.000000 68.000000 67.000000
mean NaN NaN 23.795588 2.413235 42067.000000
std NaN NaN 4.925518 1.290251 38.970074
min NaN NaN 15.300000 0.300000 42001.000000
25% NaN NaN 20.725000 1.475000 42034.000000
50% NaN NaN 23.300000 2.200000 42067.000000
75% NaN NaN 25.725000 2.900000 42100.000000
max NaN NaN 44.300000 7.000000 42133.000000
[8 rows x 549 columns],
Index(['DP03_0001E', 'DP03_0001M', 'DP03_0001PE', 'DP03_0001PM', 'DP03_0002E',
'DP03_0002M', 'DP03_0002PE', 'DP03_0002PM', 'DP03_0003E', 'DP03_0003M',
...
'DP03_0136M', 'DP03_0136PE', 'DP03_0136PM', 'DP03_0137E', 'DP03_0137M',
'DP03_0137PE', 'DP03_0137PM', 'GEO_ID', 'county', 'fips'],
dtype='object', length=551),
DP03_0001E 0
DP03_0001M 0
DP03_0001PE 0
DP03_0001PM 68
DP03_0002E 1
..
DP03_0137PE 0
DP03_0137PM 0
GEO_ID 0
county 0
fips 1
Length: 551, dtype: int64)
df.dtypes
DP03_0001E int64
DP03_0001M int64
DP03_0001PE float64
DP03_0001PM float64
DP03_0002E float64
...
DP03_0137PE float64
DP03_0137PM float64
GEO_ID object
county object
fips float64
Length: 551, dtype: object
I renamed the columns that I wanted to use and created a dictionary with the key value pairs for the column header names from the credit bureau website
FIPS is a five-digit Federal Information Processing Standards code which uniquely identifies counties in the United States. I use FIPS along with geojson to create Choropleth Maps.
# Dictionary of all renamed columns
# All of the renamed columns are ESTIMATES from the U.S. Census Bureau
# Columns not renamed include: Percent (PE), Margin of Error (M), Percent Margin of Error (PM)
dict = {# Employment Status
# Population 16 years and over
"DP03_0001E" : "total_population", # Total Population elgible for work
"DP03_0002E" : "labor_force",
"DP03_0003E" : "civ_labor_force",
"DP03_0004E" : "total_employed",
"DP03_0005E" : "total_unemployed",
"DP03_0006E" : "armed_forces",
"DP03_0007E" : "not_in_labor_force",
# Females 16 years and over
"DP03_0010E" : "total_population_female", # Total Population elgible for work
"DP03_0011E" : "labor_force_female",
"DP03_0012E" : "civ_labor_force_female",
"DP03_0013E" : "civ_labor_force_female_employed",
# Households with children
"DP03_0014E" : "household_children_under_6", # Own children of the householder under 6 years
# All parents in family in labor force
"DP03_0015E" : "parents_work_children_under_6", # Own children of the householder under 6 years
"DP03_0016E" : "household_children_6to17", # Own children of the householder 6 to 17 years
# All parents in family in labor force
"DP03_0017E" : "parents_work_children_6to17", # Own children of the householder 6 to 17 years
# Commuting to work
"DP03_0018E" : "total_workers_commute",
"DP03_0019E" : "solo_vehicle_commute", # Car, truck, or van -- drove alone
"DP03_0020E" : "carpool_commute", # Car, truck, or van -- carpooled
"DP03_0021E" : "public_transportation_commute", # Public transportation (excluding taxicab)
"DP03_0022E" : "walked_commute",
"DP03_0023E" : "other_means_commute",
"DP03_0024E" : "worked_from_home",
"DP03_0025E" : "mean_commute_time_minutes",
# Occupation
"DP03_0027E" : "manage_business_sci_art", # Management, business, science, and arts occupations
"DP03_0028E" : "service_occupations",
"DP03_0029E" : "sales_and_office_occupations",
# Natural resources, construction, and maintenance occupations
"DP03_0030E" : "nr_construction_and_maintenance",
# Production, transportation, and material moving occupations
"DP03_0031E" : "production_transportation_mm",
# Industry
"DP03_0033E" : "ag_forest_fish_hunt_mine", # Agriculture, forestry, fishing and hunting, and mining
"DP03_0034E" : "construction",
"DP03_0035E" : "manufacturing",
"DP03_0036E" : "wholesale_trade",
"DP03_0037E" : "retail_trade",
"DP03_0038E" : "transportation_warehousing_utilities",
"DP03_0039E" : "information",
"DP03_0040E" : "firerl", # Finance, insurance, real estate, rental and leasing
# Professional, scientific, and management, and administrative and waste management services
"DP03_0041E" : "psmawms",
# Educational services, and health care and social assistance
"DP03_0042E" : "education_health_care_social",
# Arts, entertainment, and recreation, and accommodation and food services
"DP03_0043E" : "art_entertainment_accommodation",
"DP03_0044E" : "other_services", # Other services, except public administration
"DP03_0045E" : "public_administration",
# Class of worker
"DP03_0047E" : "private_wage_and_salary_worker",
"DP03_0048E" : "government_worker",
"DP03_0049E" : "self_employed_worker", # Self-employed in own not incorporated business workers
"DP03_0050E" : "unpaid_family_worker",
# Income and benefits (in 2020 inflation-adjusted dollars)
# Total households
"DP03_0051E" : "total_households",
"DP03_0052E" : "household_less_than_10k",
"DP03_0053E" : "household_10k_to_15k", # $10,000 to $14,999
"DP03_0054E" : "household_15k_to_25k", # $15,000 to $24,999
"DP03_0055E" : "household_25k_to_35k", # $25,000 to $34,999
"DP03_0056E" : "household_35k_to_50k", # $35,000 to $49,999
"DP03_0057E" : "household_50k_to_75k", # $50,000 to $74,999
"DP03_0058E" : "household_75k_to_100k", # $75,000 to $99,999
"DP03_0059E" : "household_100k_to_150k", # $100,000 to $149,999
"DP03_0060E" : "household_150k_to_200k", # $150,000 to $199,999
'DP03_0061E' : "household_200k_plus", # $200,000 or more
"DP03_0062E" : "household_median_income", # dollars
"DP03_0063E" : "household_mean_income", # dollars
# Families
"DP03_0075E" : "total_families",
"DP03_0076E" : "family_less_than_10k",
"DP03_0077E" : "family_10k_to_15k", # $10,000 to $14,999
"DP03_0078E" : "family_15k_to_25k", # $15,000 to $24,999
"DP03_0079E" : "family_25k_to_35k", # $25,000 to $34,999
"DP03_0080E" : "family_35k_to_50k", # $35,000 to $49,999
"DP03_0081E" : "family_50k_to_75k", # $50,000 to $74,999
"DP03_0082E" : "family_75k_to_100k", # $75,000 to $99,999
"DP03_0083E" : "family_100k_to_150k", # $100,000 to $149,999
"DP03_0084E" : "family_150k_to_200k", # $150,000 to $199,999
"DP03_0085E" : "family_200k_plus", # $200,000 or more
"DP03_0086E" : "family_median_income", # dollars
"DP03_0087E" : "family_mean_income", # dollars
"DP03_0088E" : "per_capita_income",
# Nonfamily Households
"DP03_0089E" : "total_nonfamily_households",
"DP03_0090E" : "nonfamily_median_income", # dollars
"DP03_0091E" : "nonfamily_mean_income", # dollars
# Median Earnings
"DP03_0092E" : "median_earnings_for_workers", # dollars
"DP03_0093E" : "median_earnings_male_fulltime", # dollars
"DP03_0094E" : "median_earnings_female_fulltime", # dollars
# Health Insurance Coverage
"DP03_0095E" : "total_civ_population", # Total Civilian Noninstitutionalized Population
"DP03_0096E" : "civ_health_insurance_coverage", # Population
"DP03_0097E" : "civ_private_health_insurance", # Population
"DP03_0098E" : "civ_public_health_insurance", # Population
"DP03_0099E" : "civ_no_health_insurance"} # Population
df.rename(columns = dict, inplace = True)
# drop the columns with all missing values
df = df.dropna(axis=1, how="all")
df.head()
| total_population | DP03_0001M | DP03_0001PE | labor_force | DP03_0002M | DP03_0002PE | DP03_0002PM | civ_labor_force | DP03_0003M | DP03_0003PE | ... | DP03_0134PM | DP03_0135PE | DP03_0135PM | DP03_0136PE | DP03_0136PM | DP03_0137PE | DP03_0137PM | GEO_ID | county | fips | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 84387 | 137 | 84387.0 | 53245.0 | 910 | 63.1 | 1.1 | 53200 | 911 | 63.0 | ... | 0.8 | 5.7 | 0.9 | 6.2 | 0.8 | 17.4 | 1.8 | 0500000US42001 | Adams | 42001.0 |
| 1 | 1015608 | 631 | 1015608.0 | 660484.0 | 3044 | 65.0 | 0.3 | 659555 | 3038 | 64.9 | ... | 0.3 | 8.5 | 0.6 | 7.7 | 0.4 | 21.3 | 0.7 | 0500000US42003 | Allegheny | 42003.0 |
| 2 | 54444 | 101 | 54444.0 | 31615.0 | 519 | 58.1 | 1.0 | 31593 | 518 | 58.0 | ... | 1.0 | 9.2 | 1.6 | 8.4 | 1.1 | 23.8 | 2.2 | 0500000US42005 | Armstrong | 42005.0 |
| 3 | 136682 | 202 | 136682.0 | 84153.0 | 1035 | 61.6 | 0.7 | 84051 | 1040 | 61.5 | ... | 0.7 | 7.2 | 0.9 | 6.8 | 0.7 | 20.8 | 1.3 | 0500000US42007 | Beaver | 42007.0 |
| 4 | 40064 | 86 | 40064.0 | 23144.0 | 447 | 57.8 | 1.1 | 23140 | 447 | 57.8 | ... | 1.6 | 9.4 | 1.6 | 8.0 | 1.2 | 25.0 | 3.2 | 0500000US42009 | Bedford | 42009.0 |
5 rows × 460 columns
df.tail(), df.county.nunique
( total_population DP03_0001M DP03_0001PE labor_force DP03_0002M \
63 43821 104 43821.0 22241.0 767
64 294500 356 294500.0 179495.0 1512
65 22487 77 22487.0 13226.0 319
66 360718 453 360718.0 237353.0 1629
67 10456049 2462 10456049.0 6566126.0 11721
DP03_0002PE DP03_0002PM civ_labor_force DP03_0003M DP03_0003PE ... \
63 50.8 1.7 22229 766 50.7 ...
64 60.9 0.5 179459 1511 60.9 ...
65 58.8 1.4 13217 318 58.8 ...
66 65.8 0.5 237053 1634 65.7 ...
67 62.8 0.1 6558087 11636 62.7 ...
DP03_0134PM DP03_0135PE DP03_0135PM DP03_0136PE DP03_0136PM \
63 1.7 6.4 1.5 7.4 1.6
64 0.5 7.2 0.7 6.4 0.5
65 1.4 8.1 1.7 7.8 1.3
66 0.5 6.8 0.8 6.3 0.5
67 0.1 8.2 0.2 9.0 0.2
DP03_0137PE DP03_0137PM GEO_ID county fips
63 24.8 2.8 0500000US42127 Wayne 42127.0
64 21.4 1.2 0500000US42129 Westmoreland 42129.0
65 21.9 2.7 0500000US42131 Wyoming 42131.0
66 20.0 1.4 0500000US42133 York 42133.0
67 23.3 0.3 0400000US42 Pennsylvania NaN
[5 rows x 460 columns],
<bound method IndexOpsMixin.nunique of 0 Adams
1 Allegheny
2 Armstrong
3 Beaver
4 Bedford
...
63 Wayne
64 Westmoreland
65 Wyoming
66 York
67 Pennsylvania
Name: county, Length: 68, dtype: object>)
df0 = df[df["county"] == "Pennsylvania"]
df = df[df["county"] != "Pennsylvania"]
print(df.county.unique())
['Adams' 'Allegheny' 'Armstrong' 'Beaver' 'Bedford' 'Berks' 'Blair' 'Bradford' 'Bucks' 'Butler' 'Cambria' 'Cameron' 'Carbon' 'Centre' 'Chester' 'Clarion' 'Clearfield' 'Clinton' 'Columbia' 'Crawford' 'Cumberland' 'Dauphin' 'Delaware' 'Elk' 'Erie' 'Fayette' 'Forest' 'Franklin' 'Fulton' 'Greene' 'Huntingdon' 'Indiana' 'Jefferson' 'Juniata' 'Lackawanna' 'Lancaster' 'Lawrence' 'Lebanon' 'Lehigh' 'Luzerne' 'Lycoming' 'McKean' 'Mercer' 'Mifflin' 'Monroe' 'Montgomery' 'Montour' 'Northampton' 'Northumberland' 'Perry' 'Philadelphia' 'Pike' 'Potter' 'Schuylkill' 'Snyder' 'Somerset' 'Sullivan' 'Susquehanna' 'Tioga' 'Union' 'Venango' 'Warren' 'Washington' 'Wayne' 'Westmoreland' 'Wyoming' 'York']
plt.rcParams["figure.figsize"] = (12, 8)
# Importing county data for Plotly Choropleth Maps
from urllib.request import urlopen
import json
with urlopen("https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json") as response:
counties = json.load(response)
#Pennsylvania Household Income and Benefits
plt.style.use('seaborn-dark')
plot = df0[["county", "household_less_than_10k", "household_10k_to_15k", "household_15k_to_25k",
"household_25k_to_35k", "household_35k_to_50k", "household_50k_to_75k",
"household_75k_to_100k", "household_100k_to_150k", "household_150k_to_200k",
"household_200k_plus"]].plot(x = "county", kind = "bar", cmap = "Spectral")
plt.grid(axis = 'y', alpha = 0.3)
plot.set_xticklabels(plot.get_xticklabels(), rotation = 360, fontsize = 20)
plt.title("Distribution of Pennsylvania Total Household Income", fontsize = 25)
plt.legend(bbox_to_anchor = (1.02, 1), loc = 2, borderaxespad = 0, fontsize = 15)
/var/folders/2r/y399hm9d0hv3ysx7v92kl9l80000gn/T/ipykernel_64870/2960315682.py:3: MatplotlibDeprecationWarning: The seaborn styles shipped by Matplotlib are deprecated since 3.6, as they no longer correspond to the styles shipped by seaborn. However, they will remain available as 'seaborn-v0_8-<style>'. Alternatively, directly use the seaborn API instead.
plt.style.use('seaborn-dark')
<matplotlib.legend.Legend at 0x7fdc100fd840>
# Percentage of Households with less than $50,000 income
df["household_less_than_50k"] = df["household_less_than_10k"] + df["household_10k_to_15k"] + df["household_15k_to_25k"] + df["household_25k_to_35k"] + df["household_35k_to_50k"]
df["household_less_than_50k_percentage"] = (df["household_less_than_50k"]/df["total_households"]) * 100
fig = px.choropleth_mapbox(df, geojson = counties, locations = "fips",
color = "household_less_than_50k_percentage",
color_continuous_scale = "Reds",
mapbox_style = "carto-darkmatter",
zoom = 6.25, center = {"lat": 41, "lon": -77.65},
hover_name = "county",
labels = {"household_less_than_50k_percentage": "% Household < $50,000 💰"}
)
fig.update_layout(margin = {"r": 0,"t": 0,"l": 0,"b": 0})
fig.update_layout(template = "plotly_white")
fig.show()
# Percentage of Households with more than $100,000 income
df["household_100k_plus"] = df["household_100k_to_150k"] + df["household_150k_to_200k"] + df["household_200k_plus"]
df["household_100k_plus_percentage"] = (df["household_100k_plus"]/df["total_households"]) * 100
fig = px.choropleth_mapbox(df, geojson = counties, locations = "fips",
color = "household_100k_plus_percentage",
color_continuous_scale = "Greens",
mapbox_style = "carto-darkmatter",
zoom = 6.25, center = {"lat": 41, "lon": -78},
hover_name = "county",
labels = {"household_100k_plus_percentage": "% Household > $100,000 💰"}
)
fig.update_layout(margin = {"r": 0,"t": 0,"l": 0,"b": 0})
fig.update_layout(template = "plotly_white")
fig.show()
# Total Employment Rate
df["employment_rate"] = (df["total_employed"]/df["labor_force"]) * 100
fig = px.choropleth_mapbox(df, geojson = counties, locations = "fips",
color = "employment_rate",
color_continuous_scale = "RdBu_r",
range_color = (91.5, 97.5),
mapbox_style = "carto-darkmatter",
zoom = 6.25, center = {"lat": 41, "lon": -77.65},
hover_name = "county",
hover_data = ["total_employed", "total_unemployed"],
labels = {"employment_rate": "Employment Rate",
"total_employed": "Total Employed",
"total_unemployed": "Total Unemployed"}
)
fig.update_layout(margin = {"r": 0,"t": 0,"l": 0,"b": 0})
fig.update_layout(template = "plotly_dark")
fig.show()
# Employment Rate Percentage by County¶
df["labor_force_male"] = df["labor_force"] - df["labor_force_female"]
df["total_male_employed"] = df["total_employed"] - df["civ_labor_force_female_employed"]
df["employment_rate_male"] = (df["total_male_employed"]/df["labor_force_male"]) * 100
df["employment_rate_female"] = (df["civ_labor_force_female_employed"]/df["civ_labor_force_female"]) * 100
plt.style.use("Solarize_Light2")
x1 = df.employment_rate_male
x2 = df.employment_rate_female
x3 = df.employment_rate
y = df.county
plt.figure(figsize = (6, 14), dpi = 80)
plt.scatter(x1, y, color = "#0000FF", edgecolors = "#000000", s = 50, alpha = 0.75, label = "Male Employment Rate %")
plt.scatter(x2, y, color = "#FF00FF", edgecolors = "#000000", s = 50, alpha = 0.75, label = "Female Employment Rate %")
plt.plot(x3, y, color = "#000000", alpha = 0.5, linestyle = "dashed", label = "Total Employment Rate %")
plt.grid(color = "#d3d3d3", linestyle = '-', linewidth = 0.75)
plt.title("Employment Rate % by County")
plt.xlabel("Employment Rate Percentage")
plt.ylabel("")
plt.legend(loc = 2)
plt.show()
#Pennsylvania Median Earnings by County
plt.style.use("Solarize_Light2")
x1 = df.median_earnings_for_workers
x2 = df.median_earnings_male_fulltime
x3 = df.median_earnings_female_fulltime
y = df.county
plt.figure(figsize = (8, 14), dpi = 80)
plt.scatter(x1, y, color = "#000000", alpha = 1, s = 12, label = "All Workers")
plt.plot(x1, y, color = "#000000", alpha = 0.75)
plt.scatter(x2, y, color = "#0000FF", edgecolors = "#000000", label = "Male Full Time")
plt.plot(x2, y, color = "#0000FF", alpha = 0.75, linestyle = "--")
plt.scatter(x3, y, color = "#FF00FF", edgecolors = "#000000", label = "Female Full Time")
plt.plot(x3, y, color = "#FF00FF", alpha = 0.75, linestyle = "--")
plt.grid(color = "#d3d3d3", linestyle = '-', linewidth = 2)
plt.title("Pennsylvania Median Earnings by County")
plt.xlabel("Median Earnings (dollars)")
plt.ylabel("")
plt.legend(loc = 1)
plt.show()
#Pennsylvania Per Capita Income by County
plt.style.use("seaborn-dark")
x = df.per_capita_income
y = df.county
plt.figure(figsize = (8, 14), dpi = 80)
plt.scatter(x, y, color = "#00DB16", alpha = 1, s = 100, edgecolors = "#d3d3d3", label = "Per Capita Income (USD)")
plt.plot(x, y, color = "#00DB16", linestyle = "dotted")
plt.grid(color = "#d3d3d3", linestyle = '-', linewidth = 0.25)
plt.title("Pennsylvania Per Capita Income by County")
plt.xlabel("Per Capita Income (dollars)")
plt.ylabel("")
plt.legend(loc = 1)
plt.show()
/var/folders/2r/y399hm9d0hv3ysx7v92kl9l80000gn/T/ipykernel_64870/3704097254.py:4: MatplotlibDeprecationWarning: The seaborn styles shipped by Matplotlib are deprecated since 3.6, as they no longer correspond to the styles shipped by seaborn. However, they will remain available as 'seaborn-v0_8-<style>'. Alternatively, directly use the seaborn API instead.
# Percentage Worked From Home
df["worked_from_home_percentage"] = (df["worked_from_home"]/df["total_workers_commute"]) * 100
fig = px.choropleth_mapbox(df, geojson = counties, locations = "fips",
color = "worked_from_home_percentage",
color_continuous_scale = "Viridis",
mapbox_style = "carto-darkmatter",
zoom = 6.25, center = {"lat": 41, "lon": -77.65},
hover_name = "county",
labels = {"worked_from_home_percentage": "% Working From Home"}
)
fig.update_layout(margin = {"r": 0,"t": 0,"l": 0,"b": 0})
fig.update_layout(template = "plotly_dark")
fig.show()
#civ_health_insurance_coverage_percentage
df["civ_health_insurance_coverage_percentage"] = (df["civ_health_insurance_coverage"]/df["total_civ_population"]) * 100
fig = px.choropleth_mapbox(df, geojson = counties, locations = "fips",
color = "civ_health_insurance_coverage_percentage",
color_continuous_scale = "Picnic",
mapbox_style = "carto-darkmatter",
zoom = 6.25, center = {"lat": 41, "lon": -77.65},
hover_name = "county",
labels = {"civ_health_insurance_coverage_percentage": "Percentage w/ Health Insurance 🏥"}
)
fig.update_layout(margin = {"r": 0,"t": 0,"l": 0,"b": 0})
fig.update_layout(template = "plotly_dark")
fig.show()
#!pip install geopandas==0.8.1
#!pip install pyshp==1.2.10
#!pip install shapely==1.6.3
#import plotly.figure_factory as ff
#fig = ff.create_choropleth(fips=df.fips,
# scope=['PA'],
# values=df.total_population,
# title='PA total population by County',
# legend_title='')
#fig.layout.template = None
#fig.show()
#import plotly.figure_factory as ff
#values = range(len(df.fips))
#fig = ff.create_choropleth(fips=df.fips, values=values)
#fig.layout.template = None
#fig.show()